********************************************************************
***** DATA CREATION OF ARTICLE INFLUENCE SCORE:  GENDER AND COLLABORATION (LORENZO DUCTOR, SANJEEV GOYAL AND ANJA PRUMMER)
***** CREATED BY: Lorenzo Ductor (lductor@ugr.es)   
***** OBJECTIVE: do file to create the citation matrices needed to compute the 5-year article influence score
***** Imput: 
***** i) "100JournalsDataset.dta, citations from journal i to j of 100 journals from 1970 to 2017 
***** Output:
***** i) "citationMatrix`i'.xls" cross-citation matrix for each year i needed to compute the eigenfactor and article influence score 
***** ii) "articlesperJournal.xls" includes the number of articles per journal needed to compute the article influence score
********************************************************************

/********************5-year Article Influence Score****************************/

*First step open the main data file and erase financial journals
*Open the main data file
use "100JournalsDataset.dta", clear


*Step 2: Calculate the number of articles per year

*This is an adjustment to identify articles cited by others, but for whom there was no information in WoS
drop v1 v2 v3
rename v4 v1
rename v5 v2
rename v6 v3
append using 100JournalsDatasetv2_Stata13b.dta
drop v6 v4 v5
duplicates drop

*Assign a value of 1 if the article was generated between a given year and 5 years before.
forval i = 1970/2017{
gen article`i'=1 if `i'-v3<=4 & `i'-v3>=0 
}
*Sum to add up the number of articles
collapse (sum) article1974-article2017, by(v2)
reshape long article, i(v2) j(year)
keep if article>=5
export excel using "articlesperJournal.xls", replace firstrow(varlabels)

*Step 2: Calculate the citations from i to j
use "100JournalsDatasetv2_Stata13b", clear
*Consider only citations with 5 years of difference at most
keep if v3-v6<=4 & v3-v6>=0
gen numCitations=1
collapse (sum) numCitations, by(v3 v5 v2)
rename v2 columns
rename v5 rows
rename v3 year
*Save an excel file per year
forval i = 1970/2017{
preserve
keep if year==`i'
export excel using "citationMatrix`i'.xls", replace firstrow(varlabels)
restore
}

import delimited "eigenFactorDataLag5.csv", clear /*Obtained after running Eigenfactor Builder.R in R*/
rename v5 journals
rename v3 year
replace journals="J MONETARY ECON" if journals=="CARN ROCH CONF SERIE"
save ais, replace

use journalid, clear
rename jcrabbreviatedtitle journals
/*Renaming journals before merging the WoS journals with the EconLit journals*/
replace journals="CAMBRIDGE J ECON" if journals=="CAMB J ECON"
replace journals="CAN J ECONOMICS" if journals=="CAN J ECON"
replace journals="ENERGY J" if journals=="ENERG J"
replace journals="J BUS" if journal=="Journal of Business"
replace journals="J BUS VENTURING" if journal=="Journal of Business"

joinby journals using ais, unmatched(both)
/*Journal of Business Venturing missing in the Econlit*/
keep if _merge==3
save ais, replace
